#Reputation Replication Code
#Ryan Brutger & Josh Kertzer
#Last modified: June 17, 2017

## This R file contains the code necessary to replicate the analysis in the supplementary appendix. Its companion R file ("Reputation Replication 1.R") replicates the analysis in the main text.
# All of the following analyses were carried out using R version 3.4.0 on an 2.9 GHz Intel Core i5 iMac running OS X Sierra version 10.12.5. 


################# 1. Load libraries, load and clean data ##############

#### Load data and libraries
#First, set your working directory using the setwd() command:
setwd()
data1 <- get(load("repdata1.RData")) # Data for main analysis
data2 <- get(load("repdata2.RData")) # Data for STM analysis

#Download, then load required R packages; note that the syntax in the tm and stm packages changes periodically; the code below was written to work with tm version 0.7-1 and stm 1.2.2
library(stargazer)
library(ggplot2)
library(mediation)
library(stm)
library(tm)

#### Functions for cleaning data
#Rescaling function for more easily interpretable results
rescale <- function(x){
  return((x-min(x,na.rm=TRUE))/(max(x-min(x,na.rm=TRUE),na.rm=TRUE)))
}
#Recoding function
recode <- function(variable, reverse=FALSE, maxVal, binarize=FALSE, x=NA, x2){
  if (is.factor(variable)){variable <- as.numeric(variable)}
  if (missing(maxVal)){maxVal <- max(variable, na.rm=TRUE)}
  variable[variable > maxVal] <- NA
  if (reverse){
    temp <- variable
    for (j in 1:maxVal){
      temp[which(variable==j)] <- maxVal-j+1
    }
    return(temp)
  }
  if (binarize){
    if (is.na(x)){stop("Specify the value to dichotomize on")}
    temp <- variable
    if (missing(x2)){
      i <- which(variable == x)
      k <- which(variable < x | variable > x)
    }
    else{
      i <- which(variable >= x & variable <= x2)
      k <- which(variable < x | variable > x2)
    }
    temp[i] <- 1
    temp[k] <- 0
    return(temp)
  }
  return(variable)
}

#Military assertiveness
ma1r <- recode(data1$ma1, reverse=TRUE)
ma3r <- recode(data1$ma3, reverse=TRUE)
#data1$milAssert <- (ma1r + data1$ma2 + ma3r)/3 # Variable already saved in dataset, coding scale shown here as demonstration

#Rescale and code main variables of interest for data1
data1$milAssert1 <- rescale(data1$milAssert)
data1$natChauv1 <- rescale(data1$natChauv)
data1$intTrust1 <- rescale(data1$intTrust)
data1$Republican <- ifelse(data1$polParty==1,1,ifelse(data1$polParty>1,0,NA))
data1$Democrat <- ifelse(data1$polParty==2,1,ifelse(data1$polParty==1,0,ifelse(data1$polParty>2,0,NA)))
quantile(data1$milAssert1, c(0.25, 0.75), na.rm=TRUE) 
data1$milAssert2 <- ifelse(data1$milAssert1 <= 0.42, 0,ifelse(data1$milAssert1 >= 0.66,1,NA))

#Rescale and code main variables of interest for data2

#Military assertiveness
ma1r <- recode(data2$ma1, reverse=TRUE)
ma3r <- recode(data2$ma3, reverse=TRUE)
#data1$milAssert <- (ma1r + data2$ma2 + ma3r)/3 # Variable already saved in dataset, coding scale shown here as demonstration

data2$milAssert1 <- rescale(data2$milAssert)
data2$milAssert2 <- ifelse(data2$milAssert1 <= 0.42, 0,ifelse(data2$milAssert1 >= 0.66,1,NA)) 

######## Appendix section 2, tables 1-2: sample characteristics

#SSI data
data1$White <-ifelse(data1$race==4,1,ifelse(data1$race<=3 | data1$race==5,0,NA))
data1$Male <- ifelse(data1$gender==1,1,ifelse(data1$gender==2,0,NA))
data1$age <- data1$demAgeFull + 16 
data1$age0 <- ifelse(data1$age>= 18 & data1$age <= 29,1,0)
data1$age1 <- ifelse(data1$age>= 30 & data1$age <= 44,1,0)
data1$age2 <- ifelse(data1$age >= 45 & data1$age <= 64,1,0)
data1$age3 <- ifelse(data1$age >=65,1,0)

data1$education2 <- ifelse(data1$education==1| data1$education==4 | data1$education==10, 1, 
                          ifelse(data1$education==3 | data1$education==5 | data1$education==7, 2,
                                 ifelse(data1$education==2, 3,
                                        ifelse(data1$education==6 | data1$education==8, 4, NA))) )

##Table 1 output:
round(table(data1$Male)/sum(table(data1$Male)),digits=3)[c(2,1)] #Gender
round(c((table(data1$age0)/sum(table(data1$age0)))[2], (table(data1$age1)/sum(table(data1$age1)))[2], (table(data1$age2)/sum(table(data1$age2)))[2], (table(data1$age3)/sum(table(data1$age3)))[2]), digits=3) #Age
round(table(data1$education2)/sum(table(data1$education2)),digits=3) #Education
round(c((table(data1$hispanic)/sum(table(data1$hispanic)))[1], table(data1$hispanic, data1$race)[2,4] / sum(table(data1$hispanic, data1$race)), table(data1$hispanic, data1$race)[2,3] / sum(table(data1$hispanic, data1$race))), digits=3) #Race/ethnicity

###MTurk data
data2$Male <- ifelse(data2$gender==1,1,ifelse(data2$gender==2,0,NA))
data2$age0 <- ifelse(data2$age>= 18 & data2$age <= 29,1,0)
data2$age1 <- ifelse(data2$age>= 30 & data2$age <= 44,1,0)
data2$age2 <- ifelse(data2$age >= 45 & data2$age <= 64,1,0)
data2$age3 <- ifelse(data2$age >=65,1,0)
data2$education2 <- ifelse(data2$education==1| data2$education==2, 1, 
                           ifelse(data2$education==3, 2,
                                  ifelse(data2$education==4 | data2$education==5, 3,
                                         ifelse(data2$education==6 | data2$education==7 | data2$education==8, 4, NA))) )

##Table 2 output:
round(table(data2$Male)/sum(table(data2$Male)),digits=3)[c(2,1)] #Gender
round(c((table(data2$age0)/sum(table(data2$age0)))[2], (table(data2$age1)/sum(table(data2$age1)))[2], (table(data2$age2)/sum(table(data2$age2)))[2], (table(data2$age3)/sum(table(data2$age3)))[2]), digits=3) #Age
round(table(data2$education2)/sum(table(data2$education2)),digits=3) #Education

### Appendix section 3, tables 3-4: Balance across treatments
## Check for sample balance across conditions

## SSI data
data1$agerange <- ifelse(data1$age0==1, 1, 
                         ifelse(data1$age1==1, 2,
                                ifelse(data1$age2==1, 3, 
                                       ifelse(data1$age3==1, 4, NA))))

##Table 3 output:

a <- table(data1$Male, data1$Strategy)
b <- apply(a,2,sum)
round(rbind(a[2,]/b, a[1]/b),digits=3) #Male
a <- table(data1$education2, data1$Strategy)
b <- apply(a,2,sum)
round(rbind(a[1,]/b, a[2,]/b, a[3,]/b, a[4,]/b),digits=3) #Education
a <- table(data1$agerange, data1$Strategy)
b <- apply(a,2,sum)
round(rbind(a[1,]/b, a[2,]/b, a[3,]/b, a[4,]/b),digits=3) #Age

## MTurk data
# Treatment counts

data2$agerange <- ifelse(data2$age0==1, 1, 
                         ifelse(data2$age1==1, 2,
                                ifelse(data2$age2==1, 3, 
                                       ifelse(data2$age3==1, 4, NA))))
##Table 4 output:
a <- table(data2$Male, data2$Strategy)
b <- apply(a,2,sum)
round(rbind(a[2,]/b, a[1]/b),digits=3) #Male
a <- table(data2$education2, data2$Strategy)
b <- apply(a,2,sum)
round(rbind(a[1,]/b, a[2,]/b, a[3,]/b, a[4,]/b),digits=3) #Education
a <- table(data2$agerange, data2$Strategy)
b <- apply(a,2,sum)
round(rbind(a[1,]/b, a[2,]/b, a[3,]/b, a[4,]/b),digits=3) #Age


## Appendix section 4, table 5:comparing military assertiveness across the two experiments

## Create mTurk full context and SSI dataframes for pooled comparison
#Use Reputation Replication 1 to clean SSI data (run lines 9-12, 22-55, 155-156)
ssi.dat <- as.data.frame(cbind(data1$AmerRep, data1$PresRep, data1$stayOut3, data1$notEngage3, data1$milAssert2))
colnames(ssi.dat) <- c("AmerRep", "PresRep", "stayOut3", "notEngage3", "milAssert2")

mturk.dat <- as.data.frame(cbind(data2$AmerRep, data2$PresRep, data2$stayOut3, data2$notEngage3, data2$milAssert2))
colnames(mturk.dat) <- c("AmerRep", "PresRep", "stayOut3", "notEngage3", "milAssert2")

#Pooled data
pool.dat <- as.data.frame(cbind(rbind(ssi.dat, mturk.dat),src=c(rep(0,nrow(ssi.dat)), rep(1,nrow(mturk.dat)))))

#p-values below are from using the same cutpoints as before
#Difference in ATEs for America's reputation
mod.a <- lm(AmerRep ~ (stayOut3 + notEngage3)*src*milAssert2,  data=pool.dat)
summary(mod.a) #model 1
mod.p <- lm(PresRep ~ (stayOut3 + notEngage3)*src*milAssert2,  data=pool.dat)
summary(mod.p) #model 4

#Difference in ATEs for doves in America's reputation
mod.a.d <- lm(AmerRep ~ (stayOut3 + notEngage3)*src,  data=subset(pool.dat, pool.dat$milAssert2==0))
summary(mod.a.d) #p < 0.758 for stay out, p < 0.498 for not engage
#Difference in ATEs for hawks in America's reputation
mod.a.h <- lm(AmerRep ~ (stayOut3 + notEngage3)*src,  data=subset(pool.dat, pool.dat$milAssert2==1))
summary(mod.a.h) #p < 0.412 for stay out, p < 0.733 for not engage
#Difference in ATEs for doves in President's reputation
mod.p.d <- lm(PresRep ~ (stayOut3 + notEngage3)*src,  data=subset(pool.dat, pool.dat$milAssert2==0))
summary(mod.p.d) #p < 0.441 for stay out, p < 0.670 for not engage
#Difference in ATEs for hawks in President's reputation
mod.p.h <- lm(PresRep ~ (stayOut3 + notEngage3)*src,  data=subset(pool.dat, pool.dat$milAssert2==1))
summary(mod.p.h) #p < 0.316 for stay out, p < 0.975 for not engage

stargazer(mod.a, mod.a.d, mod.a.h,  mod.p, mod.p.d, mod.p.h, type="latex", style="apsr",  omit.stat=c("LL", "ser", "f"), digits=3)

##
## Appendix section 4, figure 1: supplementary experiment ATEs
##

set.seed(43215)

#Now let's look for bootstrapped treatment effects
#Function for calculating bootstrapped quantities of interest
bootTreat2 <- function(dat, dv="totApprov", B=1500){
  bootResults <- matrix(NA, nrow=B, ncol=5)
  for (i in seq_len(B)){
    resample <- sample(1:nrow(dat),nrow(dat),replace=T)
    temp <- dat[resample,]
    dep.var <- eval(parse(text=paste("temp",dv,sep="$")))
    A <- mean(dep.var[which(temp$notEngage3==1)], na.rm=TRUE)
    B <- mean(dep.var[which(temp$engage3==1)], na.rm=TRUE)
    C <- mean(dep.var[which(temp$stayOut3==1)], na.rm=TRUE)
    bootResults[i,1] <- (A-C)
    bootResults[i,2] <- (B-C)
    bootResults[i,3] <- (A-B)
    bootResults[i,4] <- (B-C)/(A-C)
    bootResults[i,5] <- (A-B)/(A-C)
    drop(list())
  }
  return(list(dv=dv, boot=bootResults, Audience.Cost=mean(bootResults[,1], na.rm=TRUE), Force.Cost=mean(bootResults[,2], na.rm=TRUE), Inconsistency.Cost=mean(bootResults[,3], na.rm=TRUE), Force.Fraction=mean(bootResults[,4], na.rm=TRUE), Inconsistency.Fraction=mean(bootResults[,5], na.rm=TRUE)))
}

arep.full <- bootTreat2(data2, dv="AmerRep") 

prep.full <- bootTreat2(data2, dv="PresRep") 

data2$milAssert2 <- ifelse(data2$milAssert1 <= 0.42, 0,ifelse(data2$milAssert1 >= 0.66,1,NA)) # Create cutpoints for hawks and doves, using same cutpoints as the main analysis

## Hawks
data2.hawks <- subset(data2[data2$milAssert2==1 & is.na(data2$milAssert2)==FALSE, ])
full.hawks <- bootTreat2(data2.hawks, dv="AmerRep") 
full.hawks.prep <- bootTreat2(data2.hawks, dv="PresRep") 

## Doves
data2.doves <- subset(data2[data2$milAssert2==0 & is.na(data2$milAssert2)==FALSE, ])
full.doves <- bootTreat2(data2.doves, dv="AmerRep") 
full.doves.prep <- bootTreat2(data2.doves, dv="PresRep") 

B <- 1500

dens <- data.frame(y=c(arep.full$boot[,2], arep.full$boot[,3], full.doves$boot[,2], full.doves$boot[,3],  full.hawks$boot[,2], full.hawks$boot[,3], prep.full$boot[,2], prep.full$boot[,3], full.doves.prep$boot[,2], full.doves.prep$boot[,3], full.hawks.prep$boot[,2], full.hawks.prep$boot[,3]), dv=rep(c("America's Reputation", "President's Reputation"), each=6*B), samp=rep(rep(rep(c("Full Sample", "Doves", "Hawks"), each=B),2),each=2), Cost=rep(rep(c("Belligerence", "Inconsistency"), each=B),6))  

#Reorder levels
dens$samp <- factor(dens$samp, levels=c("Full Sample", "Doves", "Hawks"))

dev.new(height=4.5, width=11)
ggplot(dens, aes(x=y)) + geom_density(aes(fill=Cost), alpha=0.35) + facet_grid(dv ~ samp, scales="fixed") + 
  labs(x="Reputation cost treatment effect", y="Density") + theme_bw() + theme(axis.text.y=element_blank(),
                                                                               axis.ticks.y=element_blank()) + geom_vline(xintercept=0, linetype=3, alpha=0.3) + xlim(-1.2,1.8)
#Because ggplot is sometimes finicky about color settings, you can convert to black and white in any image-editing program (e.g. Adobe Illustrator). To increase legibility for the printed version, we convert the colors to RGB (76,76,76) and (232,232,232), and change the opacity to 90% and 50%, respectively.

##
## Appendix section 5, figure 2: joint distribution of president and country's reputation
##

## plot figure 2
repMat <- data.frame(Amer=rep(1:5,each=5), Pres=rep(1:5,5), Proportion=NA)
for (i in 1:nrow(repMat)){
  repMat$Proportion[i] <- length(which(data1$AmerRep==repMat$Amer[i] & data1$PresRep==repMat$Pres[i]))/nrow(data1)
}
dev.new(height=3,width=4)
#Black & White
qplot(Amer,Pres, data=repMat, geom="tile", fill=Proportion) + theme_bw()+scale_fill_gradient(low="white", high="black") + xlab("Costs to America's Reputation") + ylab("Costs to President's Reputation")

## For statistics included in the text:

#Check correlation between the reputation measures
cor(data1$AmerRep, data1$PresRep, use="complete.obs") ## r = 0.86

# Also check what percentage view them exactly the same, better, or worse
data1$AmerPres <- data1$AmerRep - data1$PresRep
table(data1$AmerPres) 
423/587 #72.1% of respondents believe the President and the country face the same reputation cost
20/587 # 3.4%  believe the reputation costs of the President and country differ by more than 1 on a 5 point scale

##
## Appendix section 6,text analysis of open-ended responses
##

## Subset data so that casualties are held constant at zero
data<- subset(data2[is.na(data2$Casualties)==TRUE | data2$Casualties==0, ])

## Clean the data so it works with STM
new.data <- data[c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert", "amerRepFree")] # America's reputation
new.data2 <- data[c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert", "presRepFree")] # President's reputation
meta.data <- data[c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert")]
docs1 <- data[c("amerRepFree")]
docs2 <- data[c("presRepFree")]
Corpus <- Corpus(VectorSource(docs1$amerRepFree))
Corpus2 <- Corpus(VectorSource(docs2$presRepFree))

## Clean Corpus
Corpus.clean <- tm_map(Corpus, stripWhitespace)
Corpus.clean <- tm_map(Corpus.clean, removePunctuation)
Corpus.clean <- tm_map(Corpus.clean, tolower)

Corpus.clean2 <- tm_map(Corpus2, stripWhitespace)
Corpus.clean2 <- tm_map(Corpus.clean2, removePunctuation)
Corpus.clean2 <- tm_map(Corpus.clean2, tolower)

dataframe<-data.frame(text=unlist(sapply(Corpus.clean, `[`)), stringsAsFactors=F)
dataframe2<-data.frame(text=unlist(sapply(Corpus.clean2, `[`)), stringsAsFactors=F)

clean.data <- cbind(meta.data, dataframe) #America's reputation free response data
clean.data2 <- cbind(meta.data, dataframe2) # President's reputation free response data
colnames(clean.data) <- c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert","documents")
colnames(clean.data2) <- c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert","documents")

## combine data so all reputation responses are included and considered the same 
clean.data3 <- rbind(clean.data, clean.data2)

## Remove offending signs that cause errors
for(i in 1:nrow(clean.data3)){
  clean.data3$documents[i] <- gsub("[^[:alnum:]///' ]", "", clean.data3$documents[i])
}

## Now create dataframes with just doves or hawks for each treatment group
quantile(clean.data3$milAssert, c(0.25, 0.75), na.rm=TRUE) ## Quartile cutpoints 2.33 and 3.33
clean.data3$milAssert1 <- ifelse(clean.data3$milAssert <= 2.34, 0,ifelse(clean.data3$milAssert >= 3.33,1,NA))

clean.data4 <- clean.data3[c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert1", "documents")] 
clean.data4 <- subset(clean.data4[is.na(clean.data4$milAssert1)==FALSE, ])

## Use "DH" to identify data for comparing doves and hawks
DH.engage <- subset(clean.data4[clean.data4$Strategy==2, ])
DH.notengage <- subset(clean.data4[clean.data4$Strategy==3, ])

## Doves versus Hawks in Engage
processed <- textProcessor(DH.engage$documents, metadata=DH.engage)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)

## Fit the structural topic model for engage
DH.engage.model <- selectModel(processed$documents, processed$vocab, K=13, 
                               prevalence = ~ milAssert1, max.em.its = 300,
                               runs=20, data = processed$meta, seed = 51111)

DH.engage.model1 <- DH.engage.model$runout[[2]]

# Plot left side of appendix section 6, figure 3 - high probability words for each topic
plot(DH.engage.model1, topics=c(6, 11, 10), type="labels", n=10) 

## Doves versus Hawks in NotEngage
processed <- textProcessor(DH.notengage$documents, metadata=DH.notengage)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)

## Fit the structural topic model
DH.notengage.model <- selectModel(processed$documents, processed$vocab, K=13, 
                                  prevalence = ~ milAssert1, max.em.its = 300,
                                  runs=20, data = processed$meta, seed = 51111)

DH.notengage.model1 <- DH.notengage.model$runout[[1]]

# Plot right side of appendix section 6, figure 3 - high probability words for each topic
plot(DH.notengage.model1, topics=c(4, 5, 11), type="labels", n=10) 

##
## Now compare results when free responses for America's and the President's reputation are analyzed seperately
##

## Begin with America's reputation

## Remove offending signs that cause errors
for(i in 1:nrow(clean.data)){
  clean.data$documents[i] <- gsub("[^[:alnum:]///' ]", "", clean.data$documents[i])
}

## Now create dataframes with just doves or hawks for each treatment group
#quantile(clean.data$milAssert, c(0.25, 0.75), na.rm=TRUE) ## Quartile cutpoints 2.33 and 3.33
clean.data$milAssert1 <- ifelse(clean.data$milAssert <= 2.34, 0,ifelse(clean.data$milAssert >= 3.33,1,NA))

clean.data5 <- clean.data[c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert1", "documents")] 
clean.data5 <- subset(clean.data[is.na(clean.data$milAssert1)==FALSE, ])

## Use "DH" to identify data for comparing doves and hawks
DH.engage <- subset(clean.data5[clean.data5$Strategy==2, ])
DH.notengage <- subset(clean.data5[clean.data5$Strategy==3, ])

## Doves versus Hawks in Engage
processed <- textProcessor(DH.engage$documents, metadata=DH.engage)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)

## Fit the structural topic model
DH.engage.model <- selectModel(processed$documents, processed$vocab, K=13, 
                               prevalence = ~ milAssert1, max.em.its = 300,
                               runs=20, data = processed$meta, seed = 51111)

DH.engage.model1 <- DH.engage.model$runout[[3]]

set.seed(43215)
DH.engage.model1.effect <-  estimateEffect(1:13 ~ milAssert1, DH.engage.model1, meta=processed$meta, uncertainty = "Global")

## Plot topics to identify those of significance (4, 6, and 11)
plot(DH.engage.model1.effect, covariate = "milAssert1", topics =1:13, method = "difference", cov.value1=1, cov.value2=0, xlab="Difference of change from Dove to Hawk", main="Effect of Doves vs Hawks in Engage", xlim=c(-.1,.1))

## "Find thoughts" for left side of Figure 6.  The Figure was manually assembled outside of R and responses were reconstituted to their original form (prior to cleaning for STM)
## The following idendifies 10 representative responses for each topic, from which the authors selected the responses displayed in the paper

#Unclear (not plotted in appendix)
engage.thoughts4 <- findThoughts(DH.engage.model1, texts=processed$meta$documents, topics=4, n=10)
engage.thoughts4
#Accepting Unpopularity
engage.thoughts6 <- findThoughts(DH.engage.model1, texts=processed$meta$documents, topics=6, n=10)
engage.thoughts6 
#Anti-Interventionism
engage.thoughts11 <- findThoughts(DH.engage.model1, texts=processed$meta$documents, topics=11, n=10)
engage.thoughts11

## Doves versus Hawks in NotEngage
processed <- textProcessor(DH.notengage$documents, metadata=DH.notengage)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)

## Fit the structural topic model
DH.notengage.model <- selectModel(processed$documents, processed$vocab, K=13, 
                                  prevalence = ~ milAssert1, max.em.its = 300,
                                  runs=20, data = processed$meta, seed = 51111)

DH.notengage.model1 <- DH.notengage.model$runout[[4]]

set.seed(43215)
DH.notengage.model1.effect <-  estimateEffect(1:13 ~ milAssert1, DH.notengage.model1, meta=processed$meta, uncertainty = "Global")

## Plot topics to identify those of significance (4, 6, 7, and 8)
plot(DH.notengage.model1.effect, covariate = "milAssert1", topics =1:13, method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Difference of change from Dove to Hawk", main="Effect of Doves vs Hawks in Engage", xlim=c(-.1,.1))

## "Find thoughts" for left side of Figure 7.  The Figure was manually assembled outside of R and responses were reconstituted to their original form (prior to cleaning for STM)
## The following idendifies 10 representative responses for each topic, from which the authors selected the responses displayed in the paper

#Inconsistency
notengage.thoughts4 <- findThoughts(DH.notengage.model1, texts=processed$meta$documents, topics=4, n=10)
notengage.thoughts4
#Anti-Interventionism
notengage.thoughts6 <- findThoughts(DH.notengage.model1, texts=processed$meta$documents, topics=6, n=10)
notengage.thoughts6
#No Impact
notengage.thoughts7 <- findThoughts(DH.notengage.model1, texts=processed$meta$documents, topics=7, n=10)
notengage.thoughts7
#Accepting Unpopularity
notengage.thoughts8 <- findThoughts(DH.notengage.model1, texts=processed$meta$documents, topics=8, n=10)
notengage.thoughts8

## Appendix Figure 4 - Side by side plot of Not Engage and Engage

par(mfrow=c(1,2))
plot(DH.engage.model1.effect, covariate = "milAssert1", topics=c(6, 11), method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Change in topical prevalence from Dove to Hawk", main="Effect of Doves vs Hawks in Engage", xlim=c(-.15,.15),  labeltype = "custom",
                    custom.labels = c('', ''))
text(.070, 2.11, "Accepting Unpopularity", cex=1)
text(-.095, 1.11, "Anti-Interventionism", cex=1)
plot(DH.notengage.model1.effect, covariate = "milAssert1", topics=c(4, 6, 7, 8), method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Change in topical prevalence from Dove to Hawk", main="Effect of Doves vs Hawks in Not Engage", xlim=c(-.15,.15),  labeltype = "custom",
                    custom.labels = c('', '', '', ''))
text(.077, 4.16, "Inconsistency", cex=1)
text(-.065, 3.16, "Anti-Interventionism", cex=1)
text(.035, 2.16, "No Impact", cex=1)
text(-.046, 1.16, "Accepting Unpopularity", cex=1)

## Now with President's reputation

## Remove offending signs that cause errors
for(i in 1:nrow(clean.data2)){
  clean.data2$documents[i] <- gsub("[^[:alnum:]///' ]", "", clean.data2$documents[i])
}

## Now create dataframes with just doves or hawks for each treatment group
#quantile(clean.data$milAssert, c(0.25, 0.75), na.rm=TRUE) ## Quartile cutpoints 2.33 and 3.33
clean.data2$milAssert1 <- ifelse(clean.data$milAssert <= 2.34, 0,ifelse(clean.data$milAssert >= 3.33,1,NA))

clean.data6 <- clean.data2[c("Territory",   "Strategy", "totApprov", "AmerRep", "PresRep", "milAssert1", "documents")] 
clean.data6 <- subset(clean.data6[is.na(clean.data$milAssert1)==FALSE, ])

## Use "DH" to identify data for comparing doves and hawks
DH.engage <- subset(clean.data6[clean.data6$Strategy==2, ])
DH.notengage <- subset(clean.data6[clean.data6$Strategy==3, ])

## Doves versus Hawks in Engage
processed <- textProcessor(DH.engage$documents, metadata=DH.engage)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)

## Fit the structural topic model
DH.engage.model <- selectModel(processed$documents, processed$vocab, K=13, 
                               prevalence = ~ milAssert1, max.em.its = 300,
                               runs=20, data = processed$meta, seed = 51111)

DH.engage.model1 <- DH.engage.model$runout[[1]]

set.seed(43215)
DH.engage.model1.effect <-  estimateEffect(1:13 ~ milAssert1, DH.engage.model1, meta=processed$meta, uncertainty = "Global")

## Plot topics to identify those of significance (1, 2, and 13)
plot(DH.engage.model1.effect, covariate = "milAssert1", topics =1:13, method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Difference of change from Dove to Hawk", main="Effect of Doves vs Hawks in Engage", xlim=c(-.1,.1))

## "Find thoughts" for right side of Figure 6.  The Figure was manually assembled outside of R and responses were reconstituted to their original form (prior to cleaning for STM)
## The following idendifies 10 representative responses for each topic, from which the authors selected the responses displayed in the paper

#No Impact
engage.thoughts1 <- findThoughts(DH.engage.model1, texts=processed$meta$documents, topics=1, n=10)
engage.thoughts1
#Anti-Interventionism
engage.thoughts2 <- findThoughts(DH.engage.model1, texts=processed$meta$documents, topics=2, n=10)
engage.thoughts2 
#Unclear (not plotted in appendix)
engage.thoughts13 <- findThoughts(DH.engage.model1, texts=processed$meta$documents, topics=13, n=10)
engage.thoughts13


## Doves versus Hawks in NotEngage
processed <- textProcessor(DH.notengage$documents, metadata=DH.notengage)

out <- prepDocuments(processed$documents, processed$vocab, processed$meta)

## Fit the structural topic model
DH.notengage.model <- selectModel(processed$documents, processed$vocab, K=13, 
                                  prevalence = ~ milAssert1, max.em.its = 300,
                                  runs=20, data = processed$meta, seed = 51111)

DH.notengage.model1 <- DH.notengage.model$runout[[1]]

set.seed(43215)
DH.notengage.model1.effect <-  estimateEffect(1:13 ~ milAssert1, DH.notengage.model1, meta=processed$meta, uncertainty = "Global")

## Plot topics to identify those of significance (10)
plot(DH.notengage.model1.effect, covariate = "milAssert1", topics =1:13, method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Difference of change from Dove to Hawk", main="Effect of Doves vs Hawks in Engage", xlim=c(-.1,.1))

## "Find thoughts" for right side of Figure 6.  The Figure was manually assembled outside of R and responses were reconstituted to their original form (prior to cleaning for STM)
## The following idendifies 10 representative responses for each topic, from which the authors selected the responses displayed in the paper

#Partisan Justification
notengage.thoughts10 <- findThoughts(DH.notengage.model1, texts=processed$meta$documents, topics=10, n=10)
notengage.thoughts10

## Appendix Figure 5 - Side by side plot of Not Engage and Engage

par(mfrow=c(1,2))
plot(DH.engage.model1.effect, covariate = "milAssert1", topics=c(1, 2), method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Change in topical prevalence from Dove to Hawk", main="Effect of Doves vs Hawks in Engage", xlim=c(-.2,.2),  labeltype = "custom",
                    custom.labels = c('', ''))
text(-.115, 2.15, "No Impact", cex=1)
text(-.125, 1.15, "Anti-Interventionism", cex=1)
plot(DH.notengage.model1.effect, covariate = "milAssert1", topics=c(10), method = "difference", cov.value1=1, cov.value2=0, 
                    xlab="Change in topical prevalence from Dove to Hawk", main="Effect of Doves vs Hawks in Not Engage", xlim=c(-.2,.2),  labeltype = "custom",
                    custom.labels = c('', ''))
text(-.05, 1.1, "Partisan Justification", cex=1)

##
## Appendix section 7, mediation analysis
##

set.seed(43215)

#Isolate belligerence and inconsistency treatments, and subset based on hawks versus doves
dat.bel <- subset(data1, data1$notEngage3==0)
dat.inc <- subset(data1, data1$stayOut3==0)
dat.bel.dove <- subset(dat.bel, dat.bel$milAssert2==0)
dat.bel.hawk <- subset(dat.bel, dat.bel$milAssert2==1)
dat.inc.dove <- subset(dat.inc, dat.inc$milAssert2==0)
dat.inc.hawk <- subset(dat.inc, dat.inc$milAssert2==1)

## Figure 8, panel a, left side 
#Full Sample President's Reputation - Belligerence
prep.med <- lm(PresRep ~ engage3 + education2 + income + Male + ideology, data=dat.bel)
prep.dv <- lm(totApprov ~ PresRep + engage3 + education2 + income + Male + ideology, data=dat.bel)
prep.med.b <- mediate(prep.med, prep.dv, treat="engage3", mediator="PresRep", sims=1500)

## Figure 8, panel a, right side 
#Full Sample President's Reputation - Inconsistency
prep.med <- lm(PresRep ~ notEngage3 + education2 + income + Male + ideology, data=dat.inc)
prep.dv <- lm(totApprov ~ PresRep + notEngage3 + education2 + income + Male + ideology, data=dat.inc)
prep.med.i <- mediate(prep.med, prep.dv, treat="notEngage3", mediator="PresRep", sims=1500)

# Plot appendix figure 8a
par(mfrow=c(1,2))
plot(prep.med.b, main="Belligerence")
plot(prep.med.i, main="Inconsistency")

## Figure 8, panel b, left side 
#Full Sample Country's Reputation - Belligerence
arep.med <- lm(AmerRep ~ engage3 + education2 + income + Male + ideology, data=dat.bel)
arep.dv <- lm(totApprov ~ AmerRep + engage3 + education2 + income + Male + ideology, data=dat.bel)
arep.med.b <- mediate(arep.med, arep.dv, treat="engage3", mediator="AmerRep", sims=1500)
plot(arep.med.b)

## Figure 8, panel b, right side 
#Full Sample Country's Reputation - Inconsistency
arep.med <- lm(AmerRep ~ notEngage3 + education2 + income + Male + ideology, data=dat.inc)
arep.dv <- lm(totApprov ~ AmerRep + notEngage3 + education2 + income + Male + ideology, data=dat.inc)
arep.med.i <- mediate(arep.med, arep.dv, treat="notEngage3", mediator="AmerRep", sims=1500)
plot(arep.med.i)

# Plot appendix figure 8b
par(mfrow=c(1,2))
plot(arep.med.b, main="Belligerence")
plot(arep.med.i, main="Inconsistency")

##
## figure 9, president's reputation: moderated mediation results
##
set.seed(43215)

## President's Reputation - Belligerence - Doves
prep.med <- lm(PresRep ~ engage3 + education2 + income + Male + ideology, data=dat.bel.dove)
prep.dv <- lm(totApprov ~ PresRep + engage3 + education2 + income + Male + ideology, data=dat.bel.dove)
prep.med.b.d <- mediate(prep.med, prep.dv, treat="engage3", mediator="PresRep", sims=1500)

## President's Reputation - Belligerence - Hawks
prep.med <- lm(PresRep ~ engage3 + education2 + income + Male + ideology, data=dat.bel.hawk)
prep.dv <- lm(totApprov ~ PresRep + engage3 + education2 + income + Male + ideology, data=dat.bel.hawk)
prep.med.b.h <- mediate(prep.med, prep.dv, treat="engage3", mediator="PresRep", sims=1500)

## President's Reputation - Inconsistency - Doves
prep.med <- lm(PresRep ~ notEngage3 + education2 + income + Male + ideology, data=dat.inc.dove)
prep.dv <- lm(totApprov ~ PresRep + notEngage3 + education2 + income + Male + ideology, data=dat.inc.dove)
prep.med.i.d <- mediate(prep.med, prep.dv, treat="notEngage3", mediator="PresRep", sims=1500)

## President's Reputation - Inconsistency - Hawks
prep.med <- lm(PresRep ~ notEngage3 + education2 + income + Male + ideology, data=dat.inc.hawk)
prep.dv <- lm(totApprov ~ PresRep + notEngage3 + education2 + income + Male + ideology, data=dat.inc.hawk)
prep.med.i.h <- mediate(prep.med, prep.dv, treat="notEngage3", mediator="PresRep", sims=1500)

# Belligerence values - President's Reputation
points.b.d <- c(prep.med.b.d$tau.coef , prep.med.b.d$z.avg, prep.med.b.d$d.avg)
b.d.CI.low <- c(prep.med.b.d$tau.ci[1],prep.med.b.d$z.avg.ci[1],prep.med.b.d$d.avg.ci[1])
b.d.CI.high <- c(prep.med.b.d$tau.ci[2],prep.med.b.d$z.avg.ci[2],prep.med.b.d$d.avg.ci[2]) 
points.b.h <- c(prep.med.b.h$tau.coef , prep.med.b.h$z.avg, prep.med.b.h$d.avg)
b.h.CI.low <- c(prep.med.b.h$tau.ci[1], prep.med.b.h$z.avg.ci[1], prep.med.b.h$d.avg.ci[1])
b.h.CI.high <- c(prep.med.b.h$tau.ci[2],prep.med.b.h$z.avg.ci[2], prep.med.b.h$d.avg.ci[2]) 
labels <- c("Total Effect", "ADE", "ACME")

# Inconsistency values - President's Reputation
points.i.d <- c(prep.med.i.d$tau.coef , prep.med.i.d$z.avg, prep.med.i.d$d.avg)
i.d.CI.low <- c(prep.med.i.d$tau.ci[1],prep.med.i.d$z.avg.ci[1],prep.med.i.d$d.avg.ci[1])
i.d.CI.high <- c(prep.med.i.d$tau.ci[2],prep.med.i.d$z.avg.ci[2],prep.med.i.d$d.avg.ci[2]) 
points.i.h <- c(prep.med.i.h$tau.coef , prep.med.i.h$z.avg, prep.med.i.h$d.avg)
i.h.CI.low <- c(prep.med.i.h$tau.ci[1], prep.med.i.h$z.avg.ci[1], prep.med.i.h$d.avg.ci[1])
i.h.CI.high <- c(prep.med.i.h$tau.ci[2],prep.med.i.h$z.avg.ci[2], prep.med.i.h$d.avg.ci[2]) 
labels <- c("Total Effect", "ADE", "ACME")

temp <- data.frame(X=c(points.b.d, points.b.h, points.i.d, points.i.h), Low=c(b.d.CI.low, b.h.CI.low, i.d.CI.low, i.h.CI.low), High=c(b.d.CI.high, b.h.CI.high, i.d.CI.high, i.h.CI.high), Cost=(rep(c("Belligerence", "Inconsistency"), each=length(points.b.d)*2)), Group=rep(c("Doves", "Hawks"), each=3), Z=(rep(c("Total", "ADE", "ACME"),4)))

ggplot(data=temp, aes(x=X, y=Group)) + geom_point() + geom_segment(aes(x=Low, y=Group, xend=High, yend=Group)) + facet_grid(Z~Cost) + theme_bw() + labs(x="Effect", y="") + geom_vline(xintercept=0, linetype=3)

##### Now, calculate formal moderated mediation p-values, which were manually added to Figure 9

#Belligerence costs, President's reputation
prep.mod.med <- lm(PresRep ~ engage3*milAssert2 + education2 + income + Male + ideology, data=dat.bel)
prep.mod.dv <- lm(totApprov ~ engage3*milAssert2 + PresRep + education2 + income + Male + ideology, data=dat.bel)
mod.med <- mediate(prep.mod.med, prep.mod.dv, sims=1000, boot=FALSE, treat="engage3", mediator="PresRep", data=dat.bel)
modmed <- test.modmed(mod.med, covariates.1=list(milAssert2=1), covariates.2=list(milAssert2=0), sims=1000)
modmed #p < 0.026 for ACME, p < 0.010 for ADE

#Inconsistency costs, President's reputation
prep.mod.med <- lm(PresRep ~ notEngage3*milAssert2 + education2 + income + Male + ideology, data=dat.inc)
prep.mod.dv <- lm(totApprov ~ notEngage3*milAssert2 + PresRep + education2 + income + Male + ideology, data=dat.inc)
mod.med <- mediate(prep.mod.med, prep.mod.dv, sims=1000, boot=FALSE, treat="notEngage3", mediator="PresRep", data=dat.inc)
modmed <- test.modmed(mod.med, covariates.1=list(milAssert2=1), covariates.2=list(milAssert2=0), sims=1000)
modmed #p < 0.114 for ACME, p < 0.11 for ADE

#To calculate whether total effects significantly differ from each other:

#Belligerence, President's reputation 
prep.mod.tot <- lm(totApprov ~ engage3*milAssert2 + education2 + income + Male + ideology, data=dat.bel)
summary(prep.mod.tot) #p < 0.001

#Inconsistency, President's reputation
prep.mod.tot <- lm(totApprov ~ notEngage3*milAssert2 + education2 + income + Male + ideology, data=dat.inc)
summary(prep.mod.tot) #p < 0.021
